
// This file constructs variables for analysis of countydistrict.dta

replace state=proper(state) 

//generate state-county and state-year group ids

egen countyid=group(fstate fcounty)
xtset countyid year, delta(5)

egen stateyear=group(fstate year)


/// Representation Index (RI)
foreach i in cvap turnout edu winning cvap_swing cvap_delta cvap_u  {
	egen state_TSV_`i'=total(TSV_`i'), by(stateyear)
	gen RI_`i'=(TSV_`i'/state_TSV_`i') / (pop/statepop)
	drop state_TSV_`i'
	gen logRI_`i'=ln(RI_`i')
	gen logTSV_`i'=ln(TSV_`i')
}

// Psi terms (based on average share of voters)
foreach i in psi_cvap psi_cvap_delta psi_cvap_u {

gen log`i'=ln(1-(`i'/nreps))
gen `i'_bar=`i'/nreps
drop `i' // drop the summation psi* vars which are not normalized by nreps
}


// Indicate border mismatch 
gen mismatch= nwholerep!=nreps & nreps!=1

// Elections
egen state_turnout_data=max(turnout_d), by(fstate) missing
replace state_turnout_data = state_turnout_data>0 

foreach i in anyclose_30 anyclose_30_u {
	replace `i'=. if state_turnout_data==0
}

gen puncontested=uncontested/nreps
gen pdemreps=demwon/ nreps
gen pincumbent=incumbentwon/ nreps
gen pturnout=turnout/cvap
gen pdemrepsxdemmajority=pdemreps*demmajority

// Population
gen pop100k = pop/100000
gen pop2=pop100k^2
gen pop3=pop100k^3

gen cvap100k=cvap/100000
gen cvap2=cvap100k^2
gen cvap3=cvap100k^3

gen logschoolage=ln(age5_17)
gen schoolage100k=age5_17/100000
gen schoolage2=schoolage100k^2
gen schoolage3=schoolage100k^3

// Transform variables
* per capita
foreach i of varlist blacknh hispanic urban cvap bachelors belowpoverty unemployed female age65_up {
	gen p`i'=`i'/pop
	}
	
* log
foreach i of varlist rev_state nreps nreps_u vap pop cvap {
	gen log`i'=ln(`i')
}

// Spending outcomes
gen rev_state_1m=rev_state/1000000
gen rev_statepc=rev_state/pop
gen rev_own=rev_tot-rev_state-rev_locgov-rev_fed 
gen rev_fedloc=rev_fed+rev_locgov

foreach i in education notedu healthhos highways transitsub houscomdev publicwelf sewerage taxrelief {
	gen logrev_state`i'=ln(rev_state`i')
	gen rev_state`i'_share=rev_state`i'/rev_state

}

gen prev_fromstate=rev_state/rev_tot 

*exclude education, public welfare, highways, health & hospitals to report all other spending
gen rev_stateother_share = 1-rev_stateeducation_share - rev_statepublicwelf_share - rev_statehighways_share - rev_statehealthhos_share 

foreach i in rev_fed rev_locgov rev_own rev_tot exp_tot exp_edu exp_notedu rev_fedloc  {
	gen log`i'=ln(`i')
}

foreach i in exp_tot rev_tot   {
	gen `i'_1m=`i'/(10^6) // report in 1M USD
}

* Measure income in 1k USD
replace medhhinc=medhhinc/1000

/// Number of school districts based on government organization tables (CoG) 
*Note: the CoG misses all dependent education agencies (e.g., if a county operates a school district). Use NCES based measure instead. 

// Number of school districts from NCES
egen nschool_med=median(nschool_per_county), by(fstate)

gen nschool_med1=nschool_med==1 // 13 states: AK, FL, GA, KY, LA, MD, NC, NV, SC, TN, UT, VA, WV
replace nschool_med1=0 if state_a=="HI" //state-run education department
drop nschool_med 

gen county_school = nschool_per_county==1 & nschool_per_county_nosplit==1 // county is equivalent to school district 
egen state_county_school=mean(county_school), by(fstate year)
replace state_county_school= state_county_school > 0



